//set working directory 
cd  "C:\Users\johna\Dropbox (Curiel Analytx)\ElectionNightResults\replication_code"
//the above is just the path to the replcation files. The data from here on are 
//hierarchical and nested from this file path. 

//read in the state dta file 
use "data\nyt_states.dta", clear

drop total

** drop two dummy rows that had been added before
drop if hoursfromclose == 60
drop if hoursfromclose > 60.00999 & hoursfromclose < 60.01 // 60.01 isn't precisely represented
drop if hoursfromclose == 6

//drop maxtotal field from earlier data 
drop maxtotal

** calculate reporting momentum

gen total4 = norm_total if hoursfromclose <= 4 & hoursfromclose[_n+1] > 4
gen total8 = norm_total if hoursfromclose <= 8 & hoursfromclose[_n+1] > 8
gen total24 = norm_total if hoursfromclose <= 24 & hoursfromclose[_n+1] > 24
gen total48 = norm_total if hoursfromclose <= 48 & hoursfromclose[_n+1] > 48

egen maxtotal4 = max(total4),by(state)
egen maxtotal8 = max(total8),by(state)
egen maxtotal24 = max(total24),by(state)
egen maxtotal48 = max(total48),by(state)


** calculate variance from eventual vote margin

gen dempct4 = norm_dempct if hoursfromclose <= 4 & hoursfromclose[_n+1] > 4
gen dempct8 = norm_dempct if hoursfromclose <= 8 & hoursfromclose[_n+1] > 8
gen dempct24 = norm_dempct if hoursfromclose <= 24 & hoursfromclose[_n+1] > 24
gen dempct48 = norm_dempct if hoursfromclose <= 48 & hoursfromclose[_n+1] > 48

egen maxdempct4 = max(dempct4),by(state)
egen maxdempct8 = max(dempct8),by(state)
egen maxdempct24 = max(dempct24),by(state)
egen maxdempct48 = max(dempct48),by(state)

preserve
keep if state ~= state[_n+1]
replace maxdempct4 = 1.308403 if state == "hawaii"

gsort -maxdempct4
gen order1 = _n
///might want to run these separately, as for some reason it might be skipped
egen maxrange = rowmax(maxdempct*)
egen minrange = rowmin(maxdempct*)
//gen zero = 0
gen minus10 = -11

gen state_lbl = proper(subinstr(state,"-"," ",.))
replace state_lbl = "D.C." if state_lbl == "District Of Columbia"
//order minrange maxrange

twoway (pcspike order1 minrange order1 maxrange, lcolor(grey) lwidth(thick)) ///
       (scatter order1 maxdempct4, mcolor(blue) msize(small) msymbol(circle)) ///
	   (scatter order1 maxdempct8, mcolor(midblue) msize(small) msymbol(circle)) ///
	   (scatter order1 maxdempct24, mcolor(teal) msize(small) msymbol(circle)) ///
	   (scatter order1 maxdempct48, mcolor(navy) msize(small) msymbol(circle)) ///
	   (scatter order1 minus10, msymbol(none) mlabel(state_lbl) mlabsize(vsmall) mlabposition(9)) ///
	   , ///
	   ytitle("") ylabel(1(1)51, nolabels) ///
	   xtitle(Reported Biden pct. minus final Biden pct.) xlabel(-10(1)4,labsize(small)) xmtick(-10(1)4, grid) ///
	   xline(0) ///
	   legend(order(2 "4 hours" 3 "8 hours" 4 "24 hours" 5 "48 hours") title(Hours after polls closed)) xsize(4) ysize(6) aspectratio(2)
graph export "figures\fig5.png", replace width(2500)
save "data\scratch\variance", replace
